In [4]:
# Joe Kelley - jkelley38@gatech.edu
# CS-6440-O01 - Mini-Project 2 

# Imports 
import pandas as pd
import numpy as np
import plotly.offline as pyo
import plotly.express as px
import plotly.graph_objects as go
import plotly.io as pio
from plotly.subplots import make_subplots


pyo.init_notebook_mode()
In [5]:
# Pneumonia Data from:
# https://catalog.data.gov/dataset/deaths-from-pneumonia-and-influenza-pi-and-all-deaths-by-state-and-region-national-center-
# Modified above to only include National data (not state breakdown) & only include all ages

df_pnaDeaths = pd.read_csv("../data/National_Pneumonia_Deaths.csv")

# Separate the MMWR report column to be more legible in a graph - by Week 
df_pnaDeaths['Week'] = df_pnaDeaths['MMWR Year/Week'].astype(str)
df_pnaDeaths['Week'] = df_pnaDeaths['Week'].str[-2:]

display(df_pnaDeaths)
geoid Region State age season MMWR Year/Week Deaths from influenza Deaths from pneumonia Deaths from pneumonia and influenza All Deaths Pecent of deaths due to pneumonia or influenza pecent complete Week
0 National NaN NaN All 2017-18 201838 11 2808 2819 51480 5.475913 108.197858 38
1 National NaN NaN All 2017-18 201815 212 3588 3800 55165 6.888426 115.942791 15
2 National NaN NaN All 2012-13 201318 10 3504 3514 48368 7.265134 101.657226 18
3 National NaN NaN All 2009-10 201027 3 3215 3218 45580 7.060114 95.797560 27
4 National NaN NaN All 2010-11 201120 7 3575 3582 46635 7.680926 98.014901 20
... ... ... ... ... ... ... ... ... ... ... ... ... ...
489 National NaN NaN All 2016-17 201640 18 3022 3040 51403 5.914052 108.036024 40
490 National NaN NaN All 2016-17 201648 34 3515 3549 54594 6.500714 114.742694 48
491 National NaN NaN All 2015-16 201601 32 4245 4277 55843 7.658972 117.367774 01
492 National NaN NaN All 2015-16 201633 5 2866 2871 49809 5.764019 104.685842 33
493 National NaN NaN All 2016-17 201645 19 3209 3228 52533 6.144709 110.410996 45

494 rows × 13 columns

In [6]:
# United States Historical Data - Pneumonia Deaths by Flu Season & Week
# Figure 1

fig_pnaPrev = px.scatter(df_pnaDeaths,x='Week', y='Deaths from pneumonia', color='season')
fig_pnaPrev.update_layout(title = "Yearly U.S. Pneumonia Deaths by Week - ICD-10 codes J12.0-J18.9")

display(fig_pnaPrev)
In [7]:
# Covid-19 Data from:
# https://data.cdc.gov/NCHS/Provisional-COVID-19-Death-Counts-by-Week-Ending-D/r8kw-7aab
df_covidUS = pd.read_csv("../data/Provisional_COVID-19_Death_Counts_by_Week_Ending_Date_and_State.csv")
df_covidUS["pna - pna_covid"] = df_covidUS["Pneumonia Deaths"] - df_covidUS["Pneumonia and COVID-19 Deaths"]

display(df_covidUS)
Data as of Start week End Week Group State Indicator COVID-19 Deaths Total Deaths Percent of Expected Deaths Pneumonia Deaths Pneumonia and COVID-19 Deaths Influenza Deaths Pneumonia, Influenza, or COVID-19 Deaths Footnote pna - pna_covid
0 09/18/2020 02/01/2020 02/01/2020 By week United States Week-ending 0.0 58576.0 0.99 3796.0 0.0 479.0 4275.0 NaN 3796.0
1 09/18/2020 02/08/2020 02/08/2020 By week United States Week-ending 1.0 59296.0 0.99 3799.0 0.0 520.0 4320.0 NaN 3799.0
2 09/18/2020 02/15/2020 02/15/2020 By week United States Week-ending 0.0 58697.0 1.00 3824.0 0.0 558.0 4382.0 NaN 3824.0
3 09/18/2020 02/22/2020 02/22/2020 By week United States Week-ending 5.0 58734.0 1.01 3699.0 1.0 564.0 4267.0 NaN 3698.0
4 09/18/2020 02/29/2020 02/29/2020 By week United States Week-ending 9.0 59167.0 1.03 3822.0 5.0 654.0 4480.0 NaN 3817.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1777 09/18/2020 08/15/2020 08/15/2020 By week Puerto Rico Week-ending 64.0 287.0 0.52 76.0 45.0 0.0 95.0 NaN 31.0
1778 09/18/2020 08/22/2020 08/22/2020 By week Puerto Rico Week-ending 45.0 193.0 0.37 38.0 32.0 0.0 51.0 NaN 6.0
1779 09/18/2020 08/29/2020 08/29/2020 By week Puerto Rico Week-ending 23.0 113.0 0.21 29.0 17.0 0.0 35.0 NaN 12.0
1780 09/18/2020 09/05/2020 09/05/2020 By week Puerto Rico Week-ending 36.0 69.0 0.13 27.0 23.0 NaN 40.0 One or more data cells have counts between 1–9... 4.0
1781 09/18/2020 09/12/2020 09/12/2020 By week Puerto Rico Week-ending 10.0 12.0 0.02 NaN NaN 0.0 10.0 One or more data cells have counts between 1–9... NaN

1782 rows × 15 columns

In [8]:
# Pneumonia Deaths in 2020 by State / All States
# Figure 2
fig_pna2020 = px.scatter(df_covidUS,x='End Week', y='Pneumonia Deaths', color='State')
fig_pna2020.update_layout(title = "2020 U.S. Pneumonia Deaths by Week - ICD-10 codes J12.0-J18.9")

display(fig_pna2020)
In [9]:
# Data taken from:
# https://www.cdc.gov/flu/weekly/

df_pnaCompare = pd.read_csv("../data/NCHSData37.csv")

display(df_pnaCompare)
Year Week Percent of Deaths Due to Pneumonia and Influenza Expected Threshold All Deaths Pneumonia Deaths Influenza Deaths
0 2013 40 6.617957 6.36132 6.77011 47492 3140 3
1 2013 41 6.652714 6.45326 6.86185 47304 3135 12
2 2013 42 6.779127 6.55439 6.96279 47602 3216 11
3 2013 43 6.622544 6.66322 7.07142 47746 3151 11
4 2013 44 6.730631 6.77811 7.18613 48777 3271 12
... ... ... ... ... ... ... ... ...
358 2020 33 10.072165 5.13191 5.46758 58061 5843 5
359 2020 34 9.366954 5.12719 5.46286 54372 5085 8
360 2020 35 8.507472 5.13611 5.47178 48381 4106 10
361 2020 36 6.918100 5.15856 5.49423 36325 2509 4
362 2020 37 5.285226 5.19422 5.52989 18126 957 1

363 rows × 8 columns

In [10]:
# Comparing U.S. Pneumonia Deaths in 2020 vs Previous 6 Flu Seasons
# Figure 3

fig_pnaComparison = go.Figure(data = go.Scatter(x=df_pnaCompare['Week'], y=df_pnaCompare['Pneumonia Deaths'],
                                               mode='markers',marker=dict(
                                                   color=df_pnaCompare['Year'], colorscale='Sunset', showscale=True)))
fig_pnaComparison.update_layout(title = "U.S. Pneumonia Deaths 2020 vs Previous 6 Flu Seasons")

fig_pnaComparison.show()
In [13]:
# COVID-19 Deaths in 2020 by State / All States
# Figure 4
fig_covid2020 = px.scatter(df_covidUS,x='End Week', y='COVID-19 Deaths', color='State')
fig_covid2020.update_layout(title = "2020 U.S. Deaths involving COVID-19 - ICD-10 Code U07.1")

display(fig_covid2020)
In [12]:
# Deaths with Pneumonia and COVID-19 (ICD-10 codes J12.0-J18.9 and U07.1) in 2020 by State / All States
# Figure 5
fig_covid_pna2020 = px.scatter(df_covidUS,x='End Week', y='Pneumonia and COVID-19 Deaths', color='State')
fig_covid_pna2020.update_layout(title = "2020 U.S. Deaths with Pneumonia and COVID-19 - ICD-10 codes J12.0-J18.9 and U07.1")

display(fig_covid_pna2020)
In [14]:
# Influenza Deaths (ICD-10 codes J09-J11) in 2020 by State / All States
# Figure 6
fig_inf2020 = px.scatter(df_covidUS,x='End Week', y='Influenza Deaths', color='State')
fig_inf2020.update_layout(title = "2020 U.S. Influenza Deaths - ICD-10 codes J09-J11")

display(fig_inf2020)
In [15]:
# Deaths with Pneumonia, Influenza, or COVID-19 Deaths (ICD-10 codes J12.0-J18.9, J09-J11, U07.1) in 2020 by State / All States
# Figure 7
fig_covid_pna_inf_2020 = px.scatter(df_covidUS,x='End Week', y='Pneumonia, Influenza, or COVID-19 Deaths', color='State')
fig_covid_pna_inf_2020.update_layout(title = "2020 U.S. Deaths with Pneumonia, Influenza, or COVID-19 Deaths - ICD-10 codes J12.0-J18.9, J09-J11, U07.1")

display(fig_covid_pna_inf_2020)
In [16]:
# Comparison of the 4 figures above for reference
# Figure 8

subs1 = make_subplots(rows=1, cols=2, subplot_titles=('Pneumonia Deaths','COVID-19 Deaths'))
subs2 = make_subplots(rows=1, cols=2, subplot_titles=('Pneumonia and COVID-19 Deaths','Pneumonia, Influenza, or COVID-19 Deaths'))

subs1.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['Pneumonia Deaths'], mode='markers',
                         marker=dict(color=df_covidUS['Pneumonia Deaths'],colorscale='deep')), row=1, col=1)

subs1.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['COVID-19 Deaths'], mode='markers',
                         marker=dict(color=df_covidUS['COVID-19 Deaths'],colorscale='deep')), row=1, col=2)

subs2.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['Pneumonia and COVID-19 Deaths'], mode='markers',
                         marker=dict(color=df_covidUS['Pneumonia and COVID-19 Deaths'],colorscale='deep')), row=1, col=1)

subs2.add_trace(go.Scatter(x=df_covidUS['End Week'], y=df_covidUS['Pneumonia, Influenza, or COVID-19 Deaths'], mode='markers',
                         marker=dict(color=df_covidUS['Pneumonia, Influenza, or COVID-19 Deaths'],colorscale='deep')), 
              row=1, col=2)

subs1.update_layout(showlegend=False)
subs2.update_layout(showlegend=False)

subs1.show()
subs2.show()
In [17]:
# Let's Subtract Pneumonia Deaths that were classified with the COVID-19 ICD-10 Code U07.1 from the Deaths classified
# as Pneumonia Deaths ICD-10 Codes J12.0-J18.9 to see how this looks
# Figure 9

fig_pna_noCovid = px.scatter(df_covidUS,x='End Week', y='pna - pna_covid', color='State')
fig_pna_noCovid.update_layout(title = "2020 U.S. Pneumonia Deaths Without COVID-19 ICD-10 Code U07.1")

display(fig_pna_noCovid)
In [18]:
# Let's combine some data - historical pneumonia deaths YoY for the United States vs Figure 9 (2020 PNA deaths without 
# ICD-10 Code U07.1)

# I decided to manually combine these data sets; while it can be done programmitcally, I experienced issues with my pandas
# data frames combinations, so for this comparison, a 2 minute excel copy was easier
df_pnaDeathsCombined = pd.read_csv("../data/National_Pneumonia_Deaths_Modified.csv")

# Separate the MMWR report column to be more legible in a graph - by Week 
df_pnaDeathsCombined['Week'] = df_pnaDeathsCombined['MMWR Year/Week'].astype(str)
df_pnaDeathsCombined['Week'] = df_pnaDeathsCombined['Week'].str[-2:]

display(df_pnaDeathsCombined)
geoid Region State age season MMWR Year/Week Deaths from influenza Deaths from pneumonia Deaths from pneumonia and influenza All Deaths Pecent of deaths due to pneumonia or influenza pecent complete Week
0 National NaN NaN All 2009-10 201027 3.0 3215 3218.0 45580.0 7.060114 95.797560 27
1 National NaN NaN All 2009-10 200952 47.0 4310 4357.0 49617.0 8.781264 104.282306 52
2 National NaN NaN All 2009-10 200941 174.0 3703 3877.0 46453.0 8.346070 97.632384 41
3 National NaN NaN All 2009-10 201032 5.0 2928 2933.0 44400.0 6.605856 93.317500 32
4 National NaN NaN All 2009-10 201019 4.0 3464 3468.0 46362.0 7.480264 97.441125 19
... ... ... ... ... ... ... ... ... ... ... ... ... ...
522 NaN NaN NaN NaN 2020 33 NaN 2595 NaN NaN NaN NaN 33
523 NaN NaN NaN NaN 2020 34 NaN 2434 NaN NaN NaN NaN 34
524 NaN NaN NaN NaN 2020 35 NaN 2134 NaN NaN NaN NaN 35
525 NaN NaN NaN NaN 2020 36 NaN 1711 NaN NaN NaN NaN 36
526 NaN NaN NaN NaN 2020 37 NaN 955 NaN NaN NaN NaN 37

527 rows × 13 columns

In [19]:
# Let's see how the 2020 pneumonia data compares to historical trends 
# Figure 10

fig_pnaComb = px.scatter(df_pnaDeathsCombined,x='Week', y='Deaths from pneumonia', color='season')
fig_pnaComb.update_layout(title = "Annual U.S. Pneumonia Deaths by Week - ICD-10 codes J12.0-J18.9 NOT Including U07.1")
# print(fig_pnaComb.data)

fig_pnaComb.show()
In [20]:
# Let's take a look at the CDC's Nationally Notifiable Infectious Diseases and Conditions, United States: Weekly Tables -
# We'll see how cases during COVID-19's peak weeks compare to previous years trends
# https://wonder.cdc.gov/nndss/nndss_weekly_tables_menu.asp

# Hepatitis A
df_hepA = pd.read_csv("../data/infectious disease/hepA/hepA.csv")

display(df_hepA)
Year Week Hepatitis Type A Cases
0 Y - 2016 13 14
1 Y - 2016 14 4
2 Y - 2016 15 11
3 Y - 2016 16 19
4 Y - 2016 17 18
5 Y - 2016 18 11
6 Y - 2017 13 17
7 Y - 2017 14 12
8 Y - 2017 15 13
9 Y - 2017 16 8
10 Y - 2017 17 8
11 Y - 2017 18 22
12 Y - 2018 13 52
13 Y - 2018 14 50
14 Y - 2018 15 47
15 Y - 2018 16 57
16 Y - 2018 17 81
17 Y - 2018 18 59
18 Y - 2019 13 190
19 Y - 2019 14 137
20 Y - 2019 15 131
21 Y - 2019 16 149
22 Y - 2019 17 192
23 Y - 2019 18 142
24 Y - 2020 13 18
25 Y - 2020 14 30
26 Y - 2020 15 28
27 Y - 2020 16 31
28 Y - 2020 17 38
29 Y - 2020 18 13
In [21]:
# Figure 11 - Hepatitis A

fig_hepA = px.bar(df_hepA,x='Week', y='Hepatitis Type A Cases', color='Year', barmode='group')
fig_hepA.update_layout(
    xaxis = dict(
    tickmode= 'linear',
    dtick = 1.0,
    ),
    title = "Hepatitis A Weekly Cases YoY (Weeks 13-18)")

display(fig_hepA)
In [22]:
# Infectious Diseases Data
# Chlamydia Trachomatis

df_ct = pd.read_csv("../data/infectious disease/chlamydia/chlamydia_data.csv")

display(df_ct)
Year Week Chlamydia Trachomatis Cases
0 Y - 2016 13 14552
1 Y - 2016 14 16154
2 Y - 2016 15 16220
3 Y - 2016 16 14126
4 Y - 2016 17 15551
5 Y - 2016 18 16725
6 Y - 2017 13 14739
7 Y - 2017 14 15810
8 Y - 2017 15 12684
9 Y - 2017 16 15778
10 Y - 2017 17 15756
11 Y - 2017 18 15819
12 Y - 2018 13 12158
13 Y - 2018 14 14268
14 Y - 2018 15 15402
15 Y - 2018 16 13094
16 Y - 2018 17 13263
17 Y - 2018 18 16466
18 Y - 2019 13 11054
19 Y - 2019 14 11493
20 Y - 2019 15 9459
21 Y - 2019 16 11129
22 Y - 2019 17 12711
23 Y - 2019 18 11859
24 Y - 2020 13 5896
25 Y - 2020 14 6883
26 Y - 2020 15 4884
27 Y - 2020 16 7085
28 Y - 2020 17 6090
29 Y - 2020 18 6371
In [23]:
# Figure 12 - Chlamydia Trachomatis

fig_ct = px.bar(df_ct,x='Week', y='Chlamydia Trachomatis Cases', color='Year', barmode='group')
fig_ct.update_layout(
    xaxis = dict(
    tickmode= 'linear',
    dtick = 1.0),
    title = "Chlamydia Trachomatis Weekly Cases YoY (Weeks 13-18)")

display(fig_ct)
In [24]:
# Infectious Diseases Data
# Giardiasis

df_gd = pd.read_csv("../data/infectious disease/giardiasis/giardiasis_cases.csv")

display(df_gd)
Year Week Giardiasis Cases
0 Y - 2016 13 124
1 Y - 2016 14 141
2 Y - 2016 15 163
3 Y - 2016 16 143
4 Y - 2016 17 138
5 Y - 2016 18 138
6 Y - 2017 13 117
7 Y - 2017 14 126
8 Y - 2017 15 80
9 Y - 2017 16 93
10 Y - 2017 17 124
11 Y - 2017 18 131
12 Y - 2018 13 80
13 Y - 2018 14 106
14 Y - 2018 15 66
15 Y - 2018 16 88
16 Y - 2018 17 107
17 Y - 2018 18 75
18 Y - 2019 13 91
19 Y - 2019 14 83
20 Y - 2019 15 84
21 Y - 2019 16 117
22 Y - 2019 17 121
23 Y - 2019 18 72
24 Y - 2020 13 42
25 Y - 2020 14 34
26 Y - 2020 15 35
27 Y - 2020 16 34
28 Y - 2020 17 21
29 Y - 2020 18 39
In [25]:
# Figure 13 - Giardiasis

fig_gd = px.bar(df_gd,x='Week', y='Giardiasis Cases', color='Year', barmode='group')
fig_gd.update_layout(
    xaxis = dict(
    tickmode= 'linear',
    dtick = 1.0),
    title = "Giardiasis Weekly Cases YoY (Weeks 13-18)")

display(fig_gd)